In [1]:
import pandas as pd
import numpy as np
import datetime
from datetime import date
from dateutil.rrule import rrule, DAILY
from __future__ import division
import geoplotlib as glp
from geoplotlib.utils import BoundingBox, DataAccessObject
pd.set_option('display.max_columns', None)
%matplotlib inline
In [2]:
# Read and filter weather data
weather = pd.read_csv('datasets/weather_data_nyc_kjfk_clean2.csv')
incidents = pd.read_csv("datasets/NYPD_Motor_Vehicle_Collisions_weather4.csv")
weather['date'] = weather.Year.astype('str') +'/'+ weather.Month.astype('str') \
+'/'+ weather.Day.astype('str') +'/'+ weather.Hour.astype('str')
Frequency of measured weather conditions from 7/1/2012 to 3/1/2016, on hourly basis
In [3]:
# Initialize condition dictionary
conditions = list(weather.Conditions.unique())
condic = {}
for cond in conditions:
condic[cond] = 0;
# Fill condic with every occurrence of incident in given weather condition
for d in weather.date.unique():
condi = weather[weather.date == d]['Conditions'].iloc[0]
condic[condi] += 1
condic
Out[3]:
In [4]:
# Get frequency of collision per hour of specific weather condition
conditionCount = {}
for c in incidents.Conditions.unique():
if (pd.notnull(c)):
mask = ((incidents.Conditions == c))
filtered_incidents = incidents[mask]
conditionCount[c] = filtered_incidents.size
conditionCount
Out[4]:
In [8]:
# Calculate ratios
ratios = {}
for k,v in conditionCount.iteritems():
conditionCountValue = conditionCount[k]
weatherConditionCountValue = condic[k]
ratio = conditionCountValue / weatherConditionCountValue
ratios[k] = ratio
#print "%s: %s" % (k, ratio)
# Normalize on Mostly Cloudy (Most common weather condition)
reference = ratios["Mostly Cloudy"]
for k in ratios:
ratios[k] = (ratios[k]/reference)*100
ratios
Out[8]:
In [9]:
# Plot ratios
df = pd.DataFrame(pd.Series(ratios, name="Collision Frequency (Normalized)").sort_values())
df.plot(kind='barh', figsize=(8,8))
Out[9]:
In [7]:
# Export to json for d3 viz
from collections import OrderedDict
import json
with open('datasets/freq_weather2.json', 'w') as fp:
json.dump(OrderedDict(sorted(ratios.items(), key=lambda x: x[1], reverse=True)), fp)
In [ ]: